Here we use the dimRed package’s isomap functionality to embed the data picking \(\hat{d}\) by Z-G on the kernel matrix.
\(K_{isomap} = -1/2 (I - ee^T)S(I - ee^T)\), where \(S\) is the squared distance matrix and \(e\) is the one’s vector scaled by \(1/\sqrt{n}\).
S2 <- as.matrix(dist(sdat)^2)
e <- 1/sqrt(nrow(sdat)) * rep(1, nrow(sdat))
I793 <- as.matrix(diag(1,nrow(sdat)))
Kiso <- -1/2 * (I793 - (e %*% t(e))) %*% S2 %*% (I793 - (e %*% t(e)))
el3 <- getElbows(svd(Kiso)$d)[3]emb <- embed(sdat, "Isomap", knn = 50, ndim = el3)## 2018-02-08 23:40:58: Isomap START
## 2018-02-08 23:40:58: constructing knn graph
## 2018-02-08 23:40:58: calculating geodesic distances
## 2018-02-08 23:40:59: cmdscale
## 2018-02-08 23:41:00: post processing
edat <- emb@data@data
#pairs(emb@data@data, pch = 19, col = gaba + 1, cex = 0.2)Here we are restricting hierarchical GMM to only go through on level. We are comparing the cluster results to the gaba labels.
set.seed(3144)
h2 <- hmc(edat, maxDepth = 2, ccol = ccol)
h2lab <- viridis(max(h2$dat$labels$col))
h2col <- h2$dat$labels$colstackM(h2, ccol = ccol, centered = TRUE, depth = 1)cols <- c("black", "magenta")[gabaID$gaba+1]
acols <- alpha(cols, 0.35)
#pairs(h2$dat$data, pch = 19, cex = 0.7, col = acols)
plot(h2$dat$data, col = acols, pch = c(19,3)[gaba+1], cex = c(0.5,1)[gaba+1])pairs(sdat, col = acols, pch = c(19,3)[gaba+1], cex = c(0.5,1)[gaba+1])acols2 <- alpha(h2lab[h2$dat$labels$col], 0.45)
pairs(h2$dat$data, pch = 19, cex = 0.7, col = acols2)pairs(sdat, pch = 19, cex = 0.7, col = acols2)p0 <- mclust::adjustedRandIndex(pred, gaba)
perms <- foreach(i = 1:1.5e4, .combine = c) %dopar% {
set.seed(i*2)
mclust::adjustedRandIndex(sample(pred), gaba)
}
pv0 <- sum(c(perms,p0) >= p0)/length(perms)hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
main = "permutation test of ARI values", probability = TRUE)
#hist(perms, probability = TRUE)
abline(v = p0, col = 'red')| measurment | value |
|---|---|
| Misclassification Rate | 0.2660782 |
| Accuracy | 0.7339218 |
| Sensitivity | 0.6823529 |
| Specificity | 0.740113 |
| Precision | 0.2396694 |
| Recall | 0.6823529 |
| ARI | 0.137717 |
| \(p\)-value for ARI | 0.000067 |
| F1-score | 0.3547401 |
| TP | 58 |
| FP | 184 |
| TN | 524 |
| FN | 27 |